We will clean the environment, setup the locations, define colors, and create a datestamp.
Clean the environment.
Set locations and working directories…
Create a new analysis directories.
- general directory
- for plots
- for output of summary results
- for baseline tables
- for genetic analyses
- for Cox regression results
… a package-installation function …
source("scripts/functions.R")… and load those packages.
source("scripts/packages05.R")We will create a datestamp and define the Utrecht Science Park Colour Scheme.
Today = format(as.Date(as.POSIXlt(Sys.time())), "%Y%m%d")
Today.Report = format(as.Date(as.POSIXlt(Sys.time())), "%A, %B %d, %Y")
source("scripts/colors.R")We will parse the data to create regional association plots for each of the 11 loci.
Here just making a heatmap of the colors.
library("scales")
Attaching package: ‘scales’
The following object is masked from ‘package:purrr’:
discard
The following object is masked from ‘package:readr’:
col_factor
pal_npg("nrc")(10) [1] "#E64B35FF" "#4DBBD5FF" "#00A087FF" "#3C5488FF" "#F39B7FFF" "#8491B4FF" "#91D1C2FF" "#DC0000FF" "#7E6148FF" "#B09C85FF"
show_col(pal_npg("nrc")(10))
# show_col(pal_npg("nrc", alpha = 0.6)(10))We are interested in 11 top loci. We will plot these using the EU-AA-ancestry data.
library(openxlsx)
variant_list <- read.xlsx(paste0(TARGET_loc, "/Variants.xlsx"), sheet = "TopLoci")
head(variant_list)NALet’s do some plotting.
variants_of_interest <- c(variant_list$rsID)
variants_of_interest [1] "rs9349379" "rs3844006" "rs2854746" "rs4977575" "rs10899970" "rs9633535" "rs10762577" "rs11063120" "rs9515203" "rs7182103" "rs7412"
length(variants_of_interest)[1] 11
We need to load the meta-analysis summary statistics from the European - African-American ancestry analysis first.
gwas_sumstats_racer_EA_AA <- readRDS(file = paste0(OUT_loc, "/gwas_sumstats_complete_racer.EA_AA.rds"))library(RACER)
# Make directory for plots
ifelse(!dir.exists(file.path(PROJECT_loc, "/RACER")),
dir.create(file.path(PROJECT_loc, "/RACER")),
FALSE)[1] FALSE
RACER_loc = paste0(PROJECT_loc,"/RACER")
variants_of_interest_fewgenes <- c("rs10899970") # "rs9349379", "rs3844006", "rs2854746", "rs4977575", "rs10899970", "rs9633535", "rs10762577", "rs11063120", "rs9515203", "rs7182103", "rs7412"
for(VARIANT in variants_of_interest){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,18]
tempEND <- subset(variant_list, rsID == VARIANT)[,19]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp <- subset(gwas_sumstats_racer_EA_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f = RACER::formatRACER(assoc_data = temp, chr_col = 3, pos_col = 4, p_col = 5)
cat("\nGetting LD data.\n")
temp_f_ld =
data.table::setorder( # this fixes an issue where the SNPs with LD = NA are plotted last and it appears many SNPs are not present in 1000G.
RACER::ldRACER(assoc_data = temp_f, rs_col = 2, pops = "EUR", lead_snp = VARIANT),
LD)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- singlePlotRACER2(assoc_data = temp_f_ld,
chr = tempCHR, build = "hg19",
plotby = "coord", snp_plot = VARIANT,
start_plot = tempSTART, end_plot = tempEND,
label_lead = TRUE, gene_track_h = 2, gene_name_s = 1.75)
print(p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.png"), plot = last_plot())
ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.pdf"), plot = last_plot())
ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.eps"), plot = last_plot())
rm(temp, p1,
temp_f, temp_f_ld,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs9349379.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9349379...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9349379&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 22585 bytes...Merging input association data with LD...
Plotting region surrounding rs9349379 on 6:12403957-13403957.
Plotting by...
coord
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs9349379.
Getting data for rs3844006.
Subset required data.
Formatting association data.
Getting LD data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs3844006&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 10095 bytes...
Plotting region surrounding rs3844006 on 6:131595002-132595002.
Saving image for rs3844006.
Getting data for rs2854746.
Subset required data.
Formatting association data.
Getting LD data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs2854746&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 33816 bytes...
Plotting region surrounding rs2854746 on 7:45460645-46460645.
Saving image for rs2854746.
Getting data for rs4977575.
Subset required data.
Formatting association data.
Getting LD data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4977575&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 8868 bytes...
Plotting region surrounding rs4977575 on 9:21624744-22624744.
Saving image for rs4977575.
Getting data for rs10899970.
Subset required data.
Formatting association data.
Getting LD data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10899970&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 47436 bytes...
Plotting region surrounding rs10899970 on 10:44015716-45015716.
Saving image for rs10899970.
Getting data for rs9633535.
Subset required data.
Formatting association data.
Getting LD data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9633535&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 15737 bytes...
Plotting region surrounding rs9633535 on 10:63336088-64336088.
Saving image for rs9633535.
Getting data for rs10762577.
Subset required data.
Formatting association data.
Getting LD data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10762577&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 27713 bytes...
Plotting region surrounding rs10762577 on 10:75417431-76417431.
Saving image for rs10762577.
Getting data for rs11063120.
Subset required data.
Formatting association data.
Getting LD data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs11063120&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 13859 bytes...
Plotting region surrounding rs11063120 on 12:3986618-4986618.
Saving image for rs11063120.
Getting data for rs9515203.
Subset required data.
Formatting association data.
Getting LD data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9515203&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 9860 bytes...
Plotting region surrounding rs9515203 on 13:110549623-111549623.
Saving image for rs9515203.
Getting data for rs7182103.
Subset required data.
Formatting association data.
Getting LD data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs7182103&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 36361 bytes...
Plotting region surrounding rs7182103 on 15:78623946-79623946.
Saving image for rs7182103.
Getting data for rs7412.
Subset required data.
Formatting association data.
Getting LD data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs7412&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 12630 bytes...
Plotting region surrounding rs7412 on 19:44912079-45912079.
Saving image for rs7412.
These are genetic loci with many genes.
variants_of_interest_manygenes <- c("rs7412", "rs10762577")
for(VARIANT in variants_of_interest_manygenes){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,18]
tempEND <- subset(variant_list, rsID == VARIANT)[,19]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp <- subset(gwas_sumstats_racer_EA_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f = RACER::formatRACER(assoc_data = temp, chr_col = 3, pos_col = 4, p_col = 5)
cat("\nGetting LD data.\n")
temp_f_ld =
data.table::setorder( # this fixes an issue where the SNPs with LD = NA are plotted last and it appears many SNPs are not present in 1000G.
RACER::ldRACER(assoc_data = temp_f, rs_col = 2, pops = "EUR", lead_snp = VARIANT),
LD)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- singlePlotRACER2(assoc_data = temp_f_ld,
chr = tempCHR, build = "hg19",
plotby = "snp", snp_plot = VARIANT,
label_lead = TRUE, gene_track_h = 0.75, gene_name_s = 1.75)
print(p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.png"), plot = last_plot())
ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.pdf"), plot = last_plot())
ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.eps"), plot = last_plot())
rm(temp, p1,
temp_f, temp_f_ld,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs7412.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs7412...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs7412&pop=EUR&r2_d=r2&token=c0f613f149ab"
Merging input association data with LD...
Plotting region surrounding rs7412 on 19:44912079-45912079.
Plotting by...
snp rs7412
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs7412.
Getting data for rs10762577.
Subset required data.
Formatting association data.
Getting LD data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10762577&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 16159 bytes...
Downloaded 27713 bytes...
Plotting region surrounding rs10762577 on 10:75417431-76417431.
Saving image for rs10762577.
The CXCL12 genetic locus.
variants_of_interest_cxcl12 <- c("rs10899970")
for(VARIANT in variants_of_interest_cxcl12){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,18]
tempEND <- subset(variant_list, rsID == VARIANT)[,19]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp <- subset(gwas_sumstats_racer_EA_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f = RACER::formatRACER(assoc_data = temp, chr_col = 3, pos_col = 4, p_col = 5)
cat("\nGetting LD data.\n")
temp_f_ld =
data.table::setorder( # this fixes an issue where the SNPs with LD = NA are plotted last and it appears many SNPs are not present in 1000G.
RACER::ldRACER(assoc_data = temp_f, rs_col = 2, pops = "EUR", lead_snp = VARIANT),
LD)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- singlePlotRACER2(assoc_data = temp_f_ld,
chr = tempCHR, build = "hg19", set = "all",
plotby = "snp", snp_plot = VARIANT,
label_lead = TRUE, gene_track_h = 0.75, gene_name_s = 1.75)
print(p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.png"), plot = last_plot())
ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.pdf"), plot = last_plot())
ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.eps"), plot = last_plot())
rm(temp, p1,
temp_f, temp_f_ld,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs10899970.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs10899970...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10899970&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 47436 bytes...Merging input association data with LD...
Plotting region surrounding rs10899970 on 10:44015716-45015716.
Plotting by...
snp rs10899970
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs10899970.
We want to create some regional association plots to combine with teh UCSC browser tracks, thus we need the exact same regions.
library(openxlsx)
add_list <- read.xlsx(paste0(TARGET_loc, "/Variants.xlsx"), sheet = "AdditionalPlots")
DT::datatable(add_list)NAWe want to color the credible sets, which we load here.
credset <- as_tibble(fread(paste0(PROJECT_loc, "/CredibleSets/CAC_EUR_AFR_cred_set_all_loci_50kb.txt")))
credsetWe want to add the posterior probabilities and make a variable to color by.
gwas_sumstats_racer_credset <- merge(gwas_sumstats_racer_EA_AA,
credset %>% select(RSID, Posterior_Prob),
sort = FALSE,
by.x = "rsID", by.y = "RSID", all.x = TRUE) %>%
# mutate(., Posterior_Prob = ifelse(is.na(Posterior_Prob), 0, Posterior_Prob)) %>%
mutate(CredSet = case_when(Posterior_Prob > 0 ~ '95% credible set',
TRUE ~ 'not in credible set'))
head(gwas_sumstats_racer_credset)
table(gwas_sumstats_racer_credset$CredSet)
95% credible set not in credible set
103 8585944
summary(gwas_sumstats_racer_credset$Posterior_Prob) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0 0 0 0 0 1 8585944
library(RACER)
# library(plotly)
# Make directory for plots
ifelse(!dir.exists(file.path(PROJECT_loc, "/RACER")),
dir.create(file.path(PROJECT_loc, "/RACER")),
FALSE)[1] FALSE
RACER_loc = paste0(PROJECT_loc,"/RACER")
variants_of_interest <- c(add_list$rsID)
for(VARIANT in variants_of_interest){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(add_list, rsID == VARIANT)[,4]
tempSTART <- subset(add_list, rsID == VARIANT)[,5]
tempEND <- subset(add_list, rsID == VARIANT)[,6]
tempNAME <- subset(add_list, rsID == VARIANT)[,3]
cat("\nSubset required data.\n")
temp <- subset(gwas_sumstats_racer_credset, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f = RACER::formatRACER(assoc_data = temp, chr_col = 3, pos_col = 4, p_col = 5)
cat("\nGetting LD data.\n")
# temp_f_ld = RACER::ldRACER(assoc_data = temp_f, rs_col = 2, pops = "EUR", lead_snp = VARIANT)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- singlePlotRACER2(assoc_data = temp_f,
chr = tempCHR, build = "hg19",
plotby = "coord", snp_plot = VARIANT,
start_plot = tempSTART, end_plot = tempEND,
label_lead = FALSE,
grey_colors = FALSE,
cred_set = TRUE,
gene_track_h = 3, gene_name_s = 1.75)
print(p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_loc, "/", tempNAME, ".", Today, ".",VARIANT,".",tempSTART,".",tempEND,".regional_assoc.credset.png"), plot = p1)
ggsave(filename = paste0(RACER_loc, "/", tempNAME, ".", Today, ".",VARIANT,".",tempSTART,".",tempEND,".regional_assoc.credset.pdf"), plot = p1)
ggsave(filename = paste0(RACER_loc, "/", tempNAME, ".", Today, ".",VARIANT,".",tempSTART,".",tempEND,".regional_assoc.credset.eps"), plot = p1)
# print(ggplotly(p1))
rm(temp, p1,
temp_f,
tempCHR, tempSTART, tempEND,
VARIANT, tempNAME)
}Getting data for rs9633535.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
Plotting region surrounding rs9633535 on 10:63584853-63921073.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Collecting posterior probabilities
Generating Plot
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Saving image for rs9633535.
Getting data for rs2854746.
Subset required data.
Formatting association data.
Getting LD data.
Plotting region surrounding rs2854746 on 7:45894617-46054070.
Saving image for rs2854746.
Getting data for rs3844006.
Subset required data.
Formatting association data.
Getting LD data.
Plotting region surrounding rs3844006 on 6:131937915-132289374.
Saving image for rs3844006.
Note here that we plot the region, and not based on the lead variant of the EA-AA analyses.
We need to load the meta-analysis summary statistics from the African-American-only ancestry analysis.
# gwas_sumstats_unfiltered_AA <- fread(paste0(GWAS_loc,"/CAC1000G_AA_FINAL_FUMA.unfiltered.txt.gz"),
# showProgress = TRUE)
# saveRDS(gwas_sumstats_unfiltered_AA, file = paste0(OUT_loc, "/gwas_sumstats_unfiltered.AA.rds"))
# gwas_sumstats_racer_unfiltered_AA <- subset(gwas_sumstats_unfiltered_AA,
# select = c("MarkerName", "rsID", "Chr", "Position", "Pvalue"))
#
# saveRDS(gwas_sumstats_racer_unfiltered_AA, file = paste0(OUT_loc, "/gwas_sumstats_unfiltered_racer.AA.rds"))
#
# gwas_sumstats_racer_unfiltered_AA <- readRDS(file = paste0(OUT_loc, "/gwas_sumstats_unfiltered_racer.AA.rds"))
#
# gwas_sumstats_AA <- fread(paste0(GWAS_loc,"/CAC1000G_AA_FINAL_FUMA.filtered.txt.gz"),
# showProgress = TRUE)
# saveRDS(gwas_sumstats_AA, file = paste0(OUT_loc, "/gwas_sumstats.AA.rds"))
#
# gwas_sumstats_racer_AA <- subset(gwas_sumstats_AA,
# select = c("MarkerName", "rsID", "Chr", "Position", "Pvalue"))
#
# saveRDS(gwas_sumstats_racer_AA, file = paste0(OUT_loc, "/gwas_sumstats_racer.AA.rds"))
# rm(gwas_sumstats_AA)
gwas_sumstats_racer_AA <- readRDS(file = paste0(OUT_loc, "/gwas_sumstats_racer.AA.rds"))library(RACER)
# Make directory for plots
ifelse(!dir.exists(file.path(PROJECT_loc, "/RACER_AA")),
dir.create(file.path(PROJECT_loc, "/RACER_AA")),
FALSE)[1] FALSE
RACER_AA_loc = paste0(PROJECT_loc,"/RACER_AA")
# Plotting is handled a bit differently
# "rs3844006", # throws an error which I don't understand immediately - could be that the variant is not present in AA 1000G data
# "rs9633535", # this one throws an LD error
# "rs9349379", "rs3844006", "rs2854746",
variants_of_interest_fewgenes_aa <- c("rs10899970", "rs9633535", "rs10762577", "rs11063120", "rs9515203", "rs7182103", "rs7412")
for(VARIANT in variants_of_interest_fewgenes_aa){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,18]
tempEND <- subset(variant_list, rsID == VARIANT)[,19]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp <- subset(gwas_sumstats_racer_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f = RACER::formatRACER(assoc_data = temp, chr_col = 3, pos_col = 4, p_col = 5)
# cat("\nGetting LD data.\n")
temp_f_ld =
data.table::setorder( # this fixes an issue where the SNPs with LD = NA are plotted last and it appears many SNPs are not present in 1000G.
RACER::ldRACER(assoc_data = temp_f, rs_col = 2, pops = "AFR",
lead_snp = "rs4576508",
auto_snp = FALSE),
LD)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- singlePlotRACER2(assoc_data = temp_f_ld,
chr = tempCHR, build = "hg19",
plotby = "coord",
snp_plot = VARIANT,
start_plot = tempSTART, end_plot = tempEND,
label_lead = TRUE, gene_track_h = 2, gene_name_s = 1.75)
print(p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.AA.png"), plot = last_plot())
ggsave(filename = paste0(RACER_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.AA.pdf"), plot = last_plot())
ggsave(filename = paste0(RACER_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.AA.eps"), plot = last_plot())
rm(temp, p1,
temp_f, temp_f_ld,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs10899970.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs4576508...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4576508&pop=AFR&r2_d=r2&token=c0f613f149ab"
Merging input association data with LD...
Plotting region surrounding rs10899970 on 10:44015716-45015716.
Plotting by...
coord
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs10899970.
Getting data for rs9633535.
Subset required data.
Formatting association data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4576508&pop=AFR&r2_d=r2&token=c0f613f149ab"
Plotting region surrounding rs9633535 on 10:63336088-64336088.
Saving image for rs9633535.
Getting data for rs10762577.
Subset required data.
Formatting association data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4576508&pop=AFR&r2_d=r2&token=c0f613f149ab"
Plotting region surrounding rs10762577 on 10:75417431-76417431.
Saving image for rs10762577.
Getting data for rs11063120.
Subset required data.
Formatting association data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4576508&pop=AFR&r2_d=r2&token=c0f613f149ab"
Plotting region surrounding rs11063120 on 12:3986618-4986618.
Saving image for rs11063120.
Getting data for rs9515203.
Subset required data.
Formatting association data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4576508&pop=AFR&r2_d=r2&token=c0f613f149ab"
Plotting region surrounding rs9515203 on 13:110549623-111549623.
Saving image for rs9515203.
Getting data for rs7182103.
Subset required data.
Formatting association data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4576508&pop=AFR&r2_d=r2&token=c0f613f149ab"
Plotting region surrounding rs7182103 on 15:78623946-79623946.
Saving image for rs7182103.
Getting data for rs7412.
Subset required data.
Formatting association data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4576508&pop=AFR&r2_d=r2&token=c0f613f149ab"
Plotting region surrounding rs7412 on 19:44912079-45912079.
Saving image for rs7412.
# chr9-rs4977575: rs7470682 is the most significant but not present in LDlink; alternative rs4576508 is present and plotted
variants_of_interest_fewgenes <- c("rs4977575")
for(VARIANT in variants_of_interest_fewgenes){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,18]
tempEND <- subset(variant_list, rsID == VARIANT)[,19]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp <- subset(gwas_sumstats_racer_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f = RACER::formatRACER(assoc_data = temp, chr_col = 3, pos_col = 4, p_col = 5)
# cat("\nGetting LD data.\n")
temp_f_ld =
data.table::setorder( # this fixes an issue where the SNPs with LD = NA are plotted last and it appears many SNPs are not present in 1000G.
RACER::ldRACER(assoc_data = temp_f, rs_col = 2, pops = "AFR",
lead_snp = "rs4576508",
auto_snp = FALSE),
LD)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- singlePlotRACER2(assoc_data = temp_f_ld,
chr = tempCHR, build = "hg19",
plotby = "coord",
snp_plot = VARIANT,
start_plot = tempSTART, end_plot = tempEND,
label_lead = TRUE, gene_track_h = 2, gene_name_s = 1.75)
print(p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.AA.png"), plot = last_plot())
ggsave(filename = paste0(RACER_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.AA.pdf"), plot = last_plot())
ggsave(filename = paste0(RACER_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.AA.eps"), plot = last_plot())
rm(temp, p1,
temp_f, temp_f_ld,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs4977575.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs4576508...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4576508&pop=AFR&r2_d=r2&token=c0f613f149ab"
Merging input association data with LD...
Plotting region surrounding rs4977575 on 9:21624744-22624744.
Plotting by...
coord
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs4977575.
The idea behind the Approximate Bayes Factor (ABF) analysis is that the association of each trait with SNPs in a region may be summarized by a vector of 0s and at most a single 1, with the 1 indicating the causal SNP (so, assuming a single causal SNP for each trait).
The posterior probability of each possible configuration can be calculated and so, crucially, can the posterior probabilities that the traits share their configurations. This allows us to estimate the support for the following cases, i.e. hypotheses:
To what extent do the loci between European and African-American ancestries overlap? We are working on the assumption that the 11 loci are the loci and will test whether these overlap, i.e. colocalize.
Let’s make sure we have remotes and coloc
installed.
if(!require("remotes"))
install.packages("remotes") # if necessaryLoading required package: remotes
Attaching package: ‘remotes’
The following objects are masked from ‘package:devtools’:
dev_package_deps, install_bioc, install_bitbucket, install_cran, install_deps, install_dev, install_git, install_github, install_gitlab,
install_local, install_svn, install_url, install_version, update_packages
The following object is masked from ‘package:usethis’:
git_credentials
library(remotes)
install_github("chr1swallace/coloc@main",
build_vignettes = TRUE)Using github PAT from envvar GITHUB_PAT
Skipping install of 'coloc' from a github remote, the SHA1 (9e2632fb) has not changed since last install.
Use `force = TRUE` to force installation
library(coloc)This is coloc version 5.2.1
We need to load the meta-analysis summary statistics from the European-only ancestry analysis.
# gwas_sumstats_EA <- fread(paste0(GWAS_loc,"/CAC1000G_EA_FINAL_FUMA.txt.gz"),
# showProgress = TRUE)
# names(gwas_sumstats_EA)[names(gwas_sumstats_EA) == "Pos"] <- "Position"
# saveRDS(gwas_sumstats_EA, file = paste0(OUT_loc, "/gwas_sumstats.EA.rds"))
#
# gwas_sumstats_racer_EA <- subset(gwas_sumstats_EA,
# select = c("MarkerName", "rsID", "Chr", "Position", "Pvalue"))
#
# saveRDS(gwas_sumstats_racer_EA, file = paste0(OUT_loc, "/gwas_sumstats_racer.EA.rds"))
# rm(gwas_sumstats_EA)
gwas_sumstats_racer_EA <- readRDS(file = paste0(OUT_loc, "/gwas_sumstats_racer.EA.rds"))We can create mirror and scatter plot for each region.
library(RACER)
# Make directory for plots
ifelse(!dir.exists(file.path(PROJECT_loc, "/RACER_EA_vs_AA")),
dir.create(file.path(PROJECT_loc, "/RACER_EA_vs_AA")),
FALSE)[1] FALSE
RACER_EA_vs_AA_loc = paste0(PROJECT_loc,"/RACER_EA_vs_AA")
variants_of_interest <- c(variant_list$rsID)
# variants_of_interest_fewgenes <- c("rs9349379",
# "rs3844006", # throws an error which I don't understand immediately - could be that the variant is not present in AA 1000G data
# "rs2854746", "rs4977575",
# "rs10899970",
# "rs9633535",
# "rs10762577",
# "rs11063120", "rs9515203", "rs7182103",
# "rs7412")
# "rs9349379", "rs3844006", "rs2854746",
variants_of_interest_fewgenes_aa <- c("rs10899970", "rs9633535", "rs10762577", "rs11063120", "rs9515203", "rs7182103", "rs7412")
for(VARIANT in variants_of_interest_fewgenes_aa){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,18]
tempEND <- subset(variant_list, rsID == VARIANT)[,19]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp1 <- subset(gwas_sumstats_racer_EA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
temp2 <- subset(gwas_sumstats_racer_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f1 = RACER::formatRACER(assoc_data = temp1, chr_col = 3, pos_col = 4, p_col = 5)
temp_f2 = RACER::formatRACER(assoc_data = temp2, chr_col = 3, pos_col = 4, p_col = 5)
# cat("\nGetting LD data.\n")
temp_f_ld1 =
data.table::setorder( # this fixes an issue where the SNPs with LD = NA are plotted last and it appears many SNPs are not present in 1000G.
RACER::ldRACER(assoc_data = temp_f1, rs_col = 2, pops = "EUR",
# lead_snp = VARIANT,
auto_snp = TRUE),
LD)
temp_f_ld2 =
data.table::setorder( # this fixes an issue where the SNPs with LD = NA are plotted last and it appears many SNPs are not present in 1000G.
RACER::ldRACER(assoc_data = temp_f2, rs_col = 2, pops = "AFR",
# lead_snp = VARIANT,
auto_snp = TRUE),
LD)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- mirrorPlotRACER(assoc_data1 = temp_f_ld1,
assoc_data2 = temp_f_ld2,
chr = tempCHR,
name1 = "European ancestry",
name2 = "African-American ancestry",
plotby = "coord",
start_plot = tempSTART, end_plot = tempEND,
label_lead = TRUE)
print(p1)
p2 <- scatterPlotRACER(assoc_data1 = temp_f_ld1,
assoc_data2 = temp_f_ld2,
chr = tempCHR,
name1 = "European ancestry",
name2 = "African-American ancestry",
region_start = tempSTART,
region_end = tempEND,
ld_df = 1,
label = TRUE)
print(p2)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_mirror.EA_vs_AA.png"), plot = p1)
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_mirror.EA_vs_AA.pdf"), plot = p1)
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_mirror.EA_vs_AA.eps"), plot = p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_scatter.EA_vs_AA.png"), plot = p2)
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_scatter.EA_vs_AA.pdf"), plot = p2)
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_scatter.EA_vs_AA.eps"), plot = p2)
rm(temp1, temp2,
p1,p2,
temp_f1, temp_f_ld1,
temp_f2, temp_f_ld2,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs10899970.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs144399321...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs144399321&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 31975 bytes...Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs800310...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs800310&pop=AFR&r2_d=r2&token=c0f613f149ab"
Downloaded 38238 bytes...Merging input association data with LD...
Plotting region surrounding rs10899970 on 10:44015716-45015716.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Saving image for rs10899970.
Saving image for rs10899970.
Getting data for rs9633535.
Subset required data.
Formatting association data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10821952&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 14830 bytes...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs115765687&pop=AFR&r2_d=r2&token=c0f613f149ab"
Downloaded 10921 bytes...
Plotting region surrounding rs9633535 on 10:63336088-64336088.
Saving image for rs9633535.
Saving image for rs9633535.
Getting data for rs10762577.
Subset required data.
Formatting association data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9664463&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 36044 bytes...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10824116&pop=AFR&r2_d=r2&token=c0f613f149ab"
Downloaded 55175 bytes...
Plotting region surrounding rs10762577 on 10:75417431-76417431.
Saving image for rs10762577.
Saving image for rs10762577.
Getting data for rs11063120.
Subset required data.
Formatting association data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs11063120&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 13859 bytes...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs2358727&pop=AFR&r2_d=r2&token=c0f613f149ab"
Downloaded 17524 bytes...
Plotting region surrounding rs11063120 on 12:3986618-4986618.
Saving image for rs11063120.
Saving image for rs11063120.
Getting data for rs9515203.
Subset required data.
Formatting association data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9559782&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 9380 bytes...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs6492260&pop=AFR&r2_d=r2&token=c0f613f149ab"
Downloaded 12418 bytes...
Plotting region surrounding rs9515203 on 13:110549623-111549623.
Saving image for rs9515203.
Saving image for rs9515203.
Getting data for rs7182103.
Subset required data.
Formatting association data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4887109&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 39518 bytes...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs3861180&pop=AFR&r2_d=r2&token=c0f613f149ab"
Downloaded 22990 bytes...
Plotting region surrounding rs7182103 on 15:78623946-79623946.
Saving image for rs7182103.
Saving image for rs7182103.
Getting data for rs7412.
Subset required data.
Formatting association data.
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs41290120&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 17369 bytes...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs141622900&pop=AFR&r2_d=r2&token=c0f613f149ab"
Downloaded 9858 bytes...
Plotting region surrounding rs7412 on 19:44912079-45912079.
Saving image for rs7412.
Saving image for rs7412.
# # chr9-rs4977575: rs7470682 is the most significant but not present in LDlink; alternative rs4576508 is present and plotted
# variants_of_interest_fewgenes_aa_9p21 <- c("rs4977575")
#
# for(VARIANT in variants_of_interest_fewgenes_aa_9p21){
# cat(paste0("Getting data for ", VARIANT,".\n"))
#
# tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
# tempSTART <- subset(variant_list, rsID == VARIANT)[,18]
# tempEND <- subset(variant_list, rsID == VARIANT)[,19]
# tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
#
# cat("\nSubset required data.\n")
# temp1 <- subset(gwas_sumstats_racer_EA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
# temp2 <- subset(gwas_sumstats_racer_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
#
# cat("\nFormatting association data.\n")
# temp_f1 = RACER::formatRACER(assoc_data = temp1, chr_col = 3, pos_col = 4, p_col = 5)
# temp_f2 = RACER::formatRACER(assoc_data = temp2, chr_col = 3, pos_col = 4, p_col = 5)
#
# # cat("\nGetting LD data.\n")
# temp_f_ld1 =
# data.table::setorder( # this fixes an issue where the SNPs with LD = NA are plotted last and it appears many SNPs are not present in 1000G.
# RACER::ldRACER(assoc_data = temp_f1, rs_col = 2, pops = "EUR",
# # lead_snp = VARIANT,
# auto_snp = TRUE),
# LD)
#
# temp_f_ld2 =
# data.table::setorder( # this fixes an issue where the SNPs with LD = NA are plotted last and it appears many SNPs are not present in 1000G.
# RACER::ldRACER(assoc_data = temp_f2, rs_col = 2, pops = "AFR",
# # lead_snp = VARIANT,
# auto_snp = TRUE),
# LD)
# cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
#
# p1 <- mirrorPlotRACER(assoc_data1 = temp_f_ld1,
# assoc_data2 = temp_f_ld2,
# chr = tempCHR,
# name1 = "European ancestry",
# name2 = "African-American ancestry",
# plotby = "coord",
# start_plot = tempSTART, end_plot = tempEND,
# label_lead = TRUE)
#
# print(p1)
#
# p2 <- scatterPlotRACER(assoc_data1 = temp_f_ld1,
# assoc_data2 = temp_f_ld2,
# chr = tempCHR,
# name1 = "European ancestry",
# name2 = "African-American ancestry",
# region_start = tempSTART,
# region_end = tempEND,
# ld_df = 1,
# label = TRUE)
#
# print(p2)
#
# cat(paste0("Saving image for ", VARIANT,".\n"))
# ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_mirror.EA_vs_AA.png"), plot = p1)
# ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_mirror.EA_vs_AA.pdf"), plot = p1)
# ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_mirror.EA_vs_AA.eps"), plot = p1)
#
# cat(paste0("Saving image for ", VARIANT,".\n"))
# ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_scatter.EA_vs_AA.png"), plot = p2)
# ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_scatter.EA_vs_AA.pdf"), plot = p2)
# ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_scatter.EA_vs_AA.eps"), plot = p2)
#
#
# rm(temp1, temp2,
# p1,p2,
# temp_f1, temp_f_ld1,
# temp_f2, temp_f_ld2,
# tempCHR, tempSTART, tempEND,
# VARIANT, tempVARIANTnr)
#
# }
# chr9-rs4977575: rs7470682 is the most significant but not present in LDlink; alternative rs4576508 is present and plotted for AA ONLY!!!
variants_of_interest_fewgenes_aa_9p21 <- c("rs4977575")
for(VARIANT in variants_of_interest_fewgenes_aa_9p21){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,18]
tempEND <- subset(variant_list, rsID == VARIANT)[,19]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp1 <- subset(gwas_sumstats_racer_EA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
temp2 <- subset(gwas_sumstats_racer_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f1 = RACER::formatRACER(assoc_data = temp1, chr_col = 3, pos_col = 4, p_col = 5)
temp_f2 = RACER::formatRACER(assoc_data = temp2, chr_col = 3, pos_col = 4, p_col = 5)
# cat("\nGetting LD data.\n")
temp_f_ld1 =
data.table::setorder( # this fixes an issue where the SNPs with LD = NA are plotted last and it appears many SNPs are not present in 1000G.
RACER::ldRACER(assoc_data = temp_f1, rs_col = 2, pops = "EUR",
# lead_snp = VARIANT,
auto_snp = TRUE),
LD)
temp_f_ld2 =
data.table::setorder( # this fixes an issue where the SNPs with LD = NA are plotted last and it appears many SNPs are not present in 1000G.
RACER::ldRACER(assoc_data = temp_f2, rs_col = 2, pops = "AFR",
lead_snp = "rs4576508",
auto_snp = FALSE),
LD)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- mirrorPlotRACER(assoc_data1 = temp_f_ld1,
assoc_data2 = temp_f_ld2,
chr = tempCHR,
name1 = "European ancestry",
name2 = "African-American ancestry",
plotby = "coord",
start_plot = tempSTART, end_plot = tempEND,
label_lead = TRUE)
print(p1)
p2 <- scatterPlotRACER(assoc_data1 = temp_f_ld1,
assoc_data2 = temp_f_ld2,
chr = tempCHR,
name1 = "European ancestry",
name2 = "African-American ancestry",
region_start = tempSTART,
region_end = tempEND,
ld_df = 1,
label = TRUE)
print(p2)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_mirror.EA_vs_AA.png"), plot = p1)
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_mirror.EA_vs_AA.pdf"), plot = p1)
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_mirror.EA_vs_AA.eps"), plot = p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_scatter.EA_vs_AA.png"), plot = p2)
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_scatter.EA_vs_AA.pdf"), plot = p2)
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_scatter.EA_vs_AA.eps"), plot = p2)
rm(temp1, temp2,
p1,p2,
temp_f1, temp_f_ld1,
temp_f2, temp_f_ld2,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs4977575.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs4977575...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4977575&pop=EUR&r2_d=r2&token=c0f613f149ab"
Downloaded 8868 bytes...Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs4576508...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4576508&pop=AFR&r2_d=r2&token=c0f613f149ab"
Downloaded 16485 bytes...Merging input association data with LD...
Plotting region surrounding rs4977575 on 9:21624744-22624744.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Saving image for rs4977575.
Saving image for rs4977575.
We want to quantify the overlap.
Setting up an output directory for coloc.
ifelse(!dir.exists(file.path(PROJECT_loc, "/COLOC_EA_vs_AA")),
dir.create(file.path(PROJECT_loc, "/COLOC_EA_vs_AA")),
FALSE)[1] FALSE
COLOC_EA_vs_AA_loc = paste0(PROJECT_loc,"/COLOC_EA_vs_AA")Preparing data for coloc: we require beta and standard
errors for coloc.
First the European ancestry data, next the African-American ancestry data.
# EA
# gwas_sumstats_EA <- fread(paste0(GWAS_loc,"/EA/CAC1000G_EA_FINAL_FUMA.txt.gz"),
# showProgress = TRUE)
# names(gwas_sumstats_EA)[names(gwas_sumstats_EA) == "Pos"] <- "Position"
# saveRDS(gwas_sumstats_EA, file = paste0(OUT_loc, "/gwas_sumstats.EA.rds"))
# gwas_sumstats_EA <- readRDS(file = paste0(OUT_loc, "/gwas_sumstats.EA.rds"))
# gwas_sumstats_coloc_EA <- subset(gwas_sumstats_EA,
# select = c("MarkerName", "rsID", "Chr", "Position",
# "Effect", "StdErr",
# "Pvalue",
# "N"))
# rm(gwas_sumstats_EA)
# saveRDS(gwas_sumstats_coloc_EA, file = paste0(OUT_loc, "/gwas_sumstats_coloc.EA.rds"))
# gwas_sumstats_coloc_EA <- readRDS(file = paste0(OUT_loc, "/gwas_sumstats_coloc.EA.rds"))
# AA
# gwas_sumstats_AA <- fread(paste0(GWAS_loc,"/AA/CAC1000G_AA_FINAL_FUMA.txt.gz"),
# showProgress = TRUE)
# names(gwas_sumstats_AA)[names(gwas_sumstats_EA) == "Pos"] <- "Position"
# saveRDS(gwas_sumstats_AA, file = paste0(OUT_loc, "/gwas_sumstats.AA.rds"))
gwas_sumstats_AA <- readRDS(file = paste0(OUT_loc, "/gwas_sumstats.AA.rds"))
names(gwas_sumstats_AA)[names(gwas_sumstats_EA) == "SE"] <- "StdErr"Error: object 'gwas_sumstats_EA' not found
Now we are reading to formally test the colocalization per trait.
Note that trait 1 = European ancestry; trait 2
= African-American ancestry.
for(VARIANT in variants_of_interest){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,18]
tempEND <- subset(variant_list, rsID == VARIANT)[,19]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp1 <- subset(gwas_sumstats_coloc_EA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
temp2 <- subset(gwas_sumstats_coloc_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nCheck temp1 data.\n")
temp1 <- rename_with(temp1, tolower)
# correcting column names
temp1 <- rename(temp1, beta = effect)
temp1 <- rename(temp1, se = stderr)
temp1 <- rename(temp1, snp = rsid)
# calculating things
temp1$varbeta <- temp1$se^2
temp1 <- as.list(temp1) # critical, as coloc expects a list of variables
temp1$type <- "quant"
temp1$sdY <- 1
coloc::check_dataset(temp1, warn.minp = 1e-10)
cat("\nCheck temp2 data.\n")
temp2 <- rename_with(temp2, tolower)
# correcting column names
temp2 <- rename(temp2, beta = effect)
temp2 <- rename(temp2, se = stderr)
temp2 <- rename(temp2, snp = rsid)
# calculating things
temp2$varbeta <- temp2$se^2
temp2 <- as.list(temp2) # critical, as coloc expects a list of variables
temp2$type <- "quant"
temp2$sdY <- 1
coloc::check_dataset(temp2, warn.minp = 1e-10)
cat("\nPlot required data.\n")
plot_dataset(temp1)
plot_dataset(temp2)
res_temp1_vs_temp2_single <- coloc.abf(dataset1 = temp1,
dataset2 = temp2)
cat("\nColocalization.\n")
print(res_temp1_vs_temp2_single)
write_lines(res_temp1_vs_temp2_single, file = paste0(COLOC_EA_vs_AA_loc, "/res_EA_vs_AA_single.",
tempVARIANTnr,".",tempCHR,"_",tempSTART,"_",tempEND,".txt"))
coloc::sensitivity(res_temp1_vs_temp2_single, "H4 > 0.9")
# Step 1: Call the pdf command to start the plot
pdf(file = paste0(COLOC_EA_vs_AA_loc, "/res_EA_vs_AA_single.",
tempVARIANTnr,".",tempCHR,"_",tempSTART,"_",tempEND,".pdf")) # The directory you want to save the file in
# Step 2: Create the plot with R code
coloc::sensitivity(res_temp1_vs_temp2_single, "H4 > 0.9")
# Step 3: Run dev.off() to create the file!
dev.off()
rm(temp1, temp2,
# p1,p2,
# temp_f1, temp_f_ld1,
# temp_f2, temp_f_ld2,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs9349379.
Subset required data.
Check temp1 data.
Check temp2 data.
Warning: minimum p value is: 0.0010709
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Plot required data.
Warning: minimum p value is: 0.0010709
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
2.64e-21 4.40e-01 1.86e-22 3.04e-02 5.30e-01
[1] "PP abf for shared variant: 53%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7610836 0.1082 0.1082 0.01169642 0.01082
Posterior
nsnps H0 H1 H2 H3 H4
1.082000e+03 2.643499e-21 4.401108e-01 1.857062e-22 3.038835e-02 5.295009e-01
Results fail decision rule H4 > 0.9
Getting data for rs3844006.
Subset required data.
Check temp1 data.
Check temp2 data.
Plot required data.
Warning: minimum p value is: 3.5548e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.Warning: minimum p value is: 9.5528e-05
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.21300 0.06910 0.02150 0.00628 0.69000
[1] "PP abf for shared variant: 69%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.8049601 0.0891 0.0891 0.0079299 0.00891
Posterior
nsnps H0 H1 H2 H3 H4
8.910000e+02 2.127262e-01 6.907437e-02 2.147238e-02 6.281855e-03 6.904452e-01
Results fail decision rule H4 > 0.9
Getting data for rs2854746.
Subset required data.
Check temp1 data.
Check temp2 data.
Plot required data.
Warning: minimum p value is: 0.00072736
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.5990 0.1080 0.0686 0.0122 0.2120
[1] "PP abf for shared variant: 21.2%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7459854 0.1147 0.1147 0.01314462 0.01147
Posterior
nsnps H0 H1 H2 H3 H4
1.147000e+03 5.987696e-01 1.080300e-01 6.855817e-02 1.215678e-02 2.124855e-01
Results fail decision rule H4 > 0.9
Getting data for rs4977575.
Subset required data.
Check temp1 data.
Check temp2 data.
Plot required data.
Warning: minimum p value is: 0.00077724
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
3.82e-38 4.04e-01 3.70e-39 3.86e-02 5.58e-01
[1] "PP abf for shared variant: 55.8%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7394555 0.1175 0.1175 0.0137945 0.01175
Posterior
nsnps H0 H1 H2 H3 H4
1.175000e+03 3.819485e-38 4.037077e-01 3.703500e-39 3.858714e-02 5.577051e-01
Results fail decision rule H4 > 0.9
Getting data for rs10899970.
Subset required data.
Check temp1 data.
Check temp2 data.
Plot required data.
Warning: minimum p value is: 0.00063041
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.00135 0.87000 0.00013 0.08340 0.04480
[1] "PP abf for shared variant: 4.48%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7350156 0.1194 0.1194 0.01424442 0.01194
Posterior
nsnps H0 H1 H2 H3 H4
1.194000e+03 1.354841e-03 8.703261e-01 1.298830e-04 8.338978e-02 4.479944e-02
Results fail decision rule H4 > 0.9
Getting data for rs9633535.
Subset required data.
Check temp1 data.
Check temp2 data.
Plot required data.
Warning: minimum p value is: 1.7844e-05
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.1540 0.5240 0.0155 0.0527 0.2540
[1] "PP abf for shared variant: 25.4%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.8188193 0.083 0.083 0.0068807 0.0083
Posterior
nsnps H0 H1 H2 H3 H4
830.00000000 0.15368622 0.52399743 0.01553863 0.05272534 0.25405239
Results fail decision rule H4 > 0.9
Getting data for rs10762577.
Subset required data.
Check temp1 data.
Check temp2 data.
Plot required data.
Warning: minimum p value is: 1.2432e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.Warning: minimum p value is: 0.00073363
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.06080 0.85200 0.00167 0.02340 0.06210
[1] "PP abf for shared variant: 6.21%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.9440809 0.0263 0.0263 0.00068906 0.00263
Posterior
nsnps H0 H1 H2 H3 H4
2.630000e+02 6.077064e-02 8.521307e-01 1.669798e-03 2.335196e-02 6.207694e-02
Results fail decision rule H4 > 0.9
Getting data for rs11063120.
Subset required data.
Check temp1 data.
Check temp2 data.
Plot required data.
Warning: minimum p value is: 3.7022e-05
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.00446 0.70700 0.00105 0.16600 0.12200
[1] "PP abf for shared variant: 12.2%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7992586 0.0916 0.0916 0.0083814 0.00916
Posterior
nsnps H0 H1 H2 H3 H4
9.160000e+02 4.464724e-03 7.069361e-01 1.047960e-03 1.658103e-01 1.217409e-01
Results fail decision rule H4 > 0.9
Getting data for rs9515203.
Subset required data.
Check temp1 data.
Check temp2 data.
Plot required data.
Warning: minimum p value is: 0.0005347
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.1080 0.7390 0.0142 0.0972 0.0417
[1] "PP abf for shared variant: 4.17%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.686176 0.1401 0.1401 0.019614 0.01401
Posterior
nsnps H0 H1 H2 H3 H4
1.401000e+03 1.078718e-01 7.390042e-01 1.419994e-02 9.723877e-02 4.168536e-02
Results fail decision rule H4 > 0.9
Getting data for rs7182103.
Subset required data.
Check temp1 data.
Check temp2 data.
Plot required data.
Warning: minimum p value is: 5.9197e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
9.93e-05 8.11e-01 1.82e-05 1.49e-01 3.97e-02
[1] "PP abf for shared variant: 3.97%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7836873 0.0984 0.0984 0.00967272 0.00984
Posterior
nsnps H0 H1 H2 H3 H4
9.840000e+02 9.928622e-05 8.113958e-01 1.820615e-05 1.487462e-01 3.974052e-02
Results fail decision rule H4 > 0.9
Getting data for rs7412.
Subset required data.
Check temp1 data.
Check temp2 data.
Plot required data.
Warning: minimum p value is: 2.9412e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.01090 0.64900 0.00402 0.23800 0.09770
[1] "PP abf for shared variant: 9.77%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7896519 0.0958 0.0958 0.00916806 0.00958
Posterior
nsnps H0 H1 H2 H3 H4
9.580000e+02 1.094517e-02 6.491241e-01 4.018346e-03 2.382180e-01 9.769439e-02
Results fail decision rule H4 > 0.9
ENPP1/ENPP3 locus
File: res_EA_vs_AA_single.1.6_131595002_132595002.txt
c(nsnps = 891, PP.H0.abf = 0.212726180436945, PP.H1.abf = 0.0690743673677815, PP.H2.abf = 0.0214723764600455, PP.H3.abf = 0.00628185511784578, PP.H4.abf = 0.690445220617383)
Conclusion: 69.0% probability that the locus is shared between both ancestries and includes the same causal variant.
IGFBP3 locus
File: res_EA_vs_AA_single.2.7_45460645_46460645.txt
c(nsnps = 1147, PP.H0.abf = 0.598769555553722, PP.H1.abf = 0.108030020950978, PP.H2.abf = 0.0685581712636257, PP.H3.abf = 0.0121567818199419, PP.H4.abf = 0.212485470411732)
Conclusion: 59.9% probability that the locus is not associated in either ancestry, but 10.8 that it is European-specific, and 21.2% that it is shared between both ancestries and includes the same causal variant.
CXCL12 locus
File: res_EA_vs_AA_single.3.10_44015716_45334720.txt
c(nsnps = 1641, PP.H0.abf = 0.00131956785769608, PP.H1.abf = 0.847689205708678, PP.H2.abf = 0.000166925872986017, PP.H3.abf = 0.107189395862512, PP.H4.abf = 0.0436349046981281)
Conclusion: 84.8% probability that the locus is European-specific, but 4.3-10.7% probability that the locus is shared between both ancestries.
ARID5B locus
File: res_EA_vs_AA_single.4.10_63336088_64336088.txt
c(nsnps = 830, PP.H0.abf = 0.153686217856268, PP.H1.abf = 0.52399742579072, PP.H2.abf = 0.0155386299541433, PP.H3.abf = 0.0527253377600635, PP.H4.abf = 0.254052388638805)
Conclusion: 52.4% probability that the locus is European-specific, but 25.4% probability that the locus is shared between both ancestries and includes different causal variants.
ADK locus
File: res_EA_vs_AA_single.5.10_75417431_76417431.txt
c(nsnps = 263, PP.H0.abf = 0.0607706367611815, PP.H1.abf = 0.85213066483438, PP.H2.abf = 0.00166979761323014, PP.H3.abf = 0.0233519569734565, PP.H4.abf = 0.0620769438177522)
Conclusion: 85.2% probability that the locus is European-specific, but 2.3-6.2% probability that the locus is shared between both ancestries.
FGF23 locus
File: res_EA_vs_AA_single.6.12_3986618_4986618.txt
c(nsnps = 916, PP.H0.abf = 0.00446472426354749, PP.H1.abf = 0.706936091086602, PP.H2.abf = 0.00104796032447378, PP.H3.abf = 0.165810337258493, PP.H4.abf = 0.121740887066883)
Conclusion: 70.7% probability that the locus is European-specific, but 12.2-16.6% probability that the locus is shared between both ancestries.
COL4A1/COL4A2 locus
File: res_EA_vs_AA_single.7.13_110549623_111549623.txt
c(nsnps = 1401, PP.H0.abf = 0.10787176608552, PP.H1.abf = 0.739004164794562, PP.H2.abf = 0.0141999398787617, PP.H3.abf = 0.0972387717170162, PP.H4.abf = 0.0416853575241402)
Conclusion: 73.9% probability that the locus is European-specific, but 4.1-9.7% probability that the locus is shared between both ancestries.
MORF4L locus
File: res_EA_vs_AA_single.8.15_78623946_79623946.txt
c(nsnps = 984, PP.H0.abf = 9.92862156724414e-05, PP.H1.abf = 0.811395804353311, PP.H2.abf = 1.82061468585998e-05, PP.H3.abf = 0.148746181818894, PP.H4.abf = 0.0397405214652622)
Conclusion: 81.1% probability that the locus is European-specific, but 14.9% probability that the locus is shared between both ancestries and includes different causal variants.
PHACTR1 locus
File: res_EA_vs_AA_single.9.6_12403957_13403957.txt
c(nsnps = 1082, PP.H0.abf = 2.64349916808974e-21, PP.H1.abf = 0.440110764390371, PP.H2.abf = 1.85706249346447e-22, PP.H3.abf = 0.0303883523691947, PP.H4.abf = 0.529500883240437)
Conclusion: 44.0% probability that the locus is European-specific, but 52.9% probability that the locus is shared between both ancestries and includes the same causal variant.
CDKN2A/CDKN2B locus
File: res_EA_vs_AA_single.10.9_21624744_22624744.txt
c(nsnps = 1175, PP.H0.abf = 3.8194848005248e-38, PP.H1.abf = 0.40370772152903, PP.H2.abf = 3.70349960775562e-39, PP.H3.abf = 0.0385871394254695, PP.H4.abf = 0.557705139045494)
Conclusion: 40.4% probability that the locus is European-specific, but 55.8% probability that the locus is shared between both ancestries and includes the same causal variant.
APOE locus
File: res_EA_vs_AA_single.11.19_44912079_45912079.txt
c(nsnps = 958, PP.H0.abf = 0.0109451663563229, PP.H1.abf = 0.649124092491782, PP.H2.abf = 0.00401834569685491, PP.H3.abf = 0.238218007619045, PP.H4.abf = 0.0976943878359947)
Conclusion: 64.9% probability that the locus is European-specific, but 23.8% probability that the locus is shared between both ancestries and includes different causal variants.
Overarching conclusions:
rm(gwas_sumstats_coloc_AA,
gwas_sumstats_racer_AA,
gwas_sumstats_coloc_EA,
gwas_sumstats_racer_AA)Warning: object 'gwas_sumstats_racer_AA' not found
Version: v1.5.0
Last update: 2023-05-04
Written by: Sander W. van der Laan (s.w.vanderlaan-2[at]umcutrecht.nl).
Description: Script to create plot regional association plots.
Minimum requirements: R version 3.4.3 (2017-06-30) -- 'Single Candle', Mac OS X El Capitan
Changes log
* v1.5.0 Fixed an issue with LD r2 plotting.
* v1.4.2 Added formal quantification of colocalization between ancestries.
* v1.4.1 Added mirror plots and scatter plots.
* v1.4.0 Update with AA data.
* v1.3.0 Added the credible sets to the aditional regions.
* v1.2.0 Added in aditional regions.
* v1.1.0 Created PNG and PDF of top loci regions.
* v1.0.0 Initial version.
sessionInfo()R version 4.3.0 (2023-04-21)
Platform: x86_64-apple-darwin22.4.0 (64-bit)
Running under: macOS Ventura 13.4
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /usr/local/Cellar/r/4.3.0_1/lib/R/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Europe/Amsterdam
tzcode source: internal
attached base packages:
[1] grid tools stats graphics grDevices utils datasets methods base
other attached packages:
[1] coloc_5.2.1 remotes_2.4.2 scales_1.2.1 ggsci_3.0.0 ggrepel_0.9.3 RACER_1.0.0 credentials_1.3.2
[8] UpSetR_1.4.0 ggpubr_0.6.0 forestplot_3.1.1 abind_1.4-5 checkmate_2.2.0 pheatmap_1.0.12 devtools_2.4.5
[15] usethis_2.1.6 BlandAltmanLeh_0.3.1 openxlsx_4.2.5.2 tableone_0.13.2 haven_2.5.2 eeptools_1.2.4 DT_0.27
[22] knitr_1.42 lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0 purrr_1.0.1 tibble_3.2.1 ggplot2_3.4.2
[29] tidyverse_2.0.0 data.table_1.14.8 naniar_1.0.0 tidyr_1.3.0 dplyr_1.1.2 optparse_1.7.3 readr_2.1.4
[36] pander_0.6.5 R.utils_2.12.2 R.oo_1.25.0 R.methodsS3_1.8.2
loaded via a namespace (and not attached):
[1] vcd_1.4-11 RColorBrewer_1.1-3 jsonlite_1.8.4 sys_3.4.1 rstudioapi_0.14 magrittr_2.0.3 farver_2.1.1 nloptr_2.0.3
[9] rmarkdown_2.21 ragg_1.2.5 fs_1.6.2 vctrs_0.6.2 memoise_2.0.1 minqa_1.2.5 askpass_1.1 rstatix_0.7.2
[17] mixsqp_0.3-48 htmltools_0.5.5 curl_5.0.0 broom_1.0.4 survey_4.2-1 sass_0.4.5 bslib_0.4.2 htmlwidgets_1.6.2
[25] plyr_1.8.8 zoo_1.8-12 cachem_1.0.8 maptools_1.1-6 mime_0.12 lifecycle_1.0.3 pkgconfig_2.0.3 Matrix_1.5-4
[33] R6_2.5.1 fastmap_1.1.1 shiny_1.7.4 digest_0.6.31 colorspace_2.1-0 reshape_0.8.9 ps_1.7.5 irlba_2.3.5.1
[41] pkgload_1.3.2 crosstalk_1.2.0 textshaping_0.3.6 labeling_0.4.2 fansi_1.0.4 timechange_0.2.0 compiler_4.3.0 bit64_4.0.5
[49] withr_2.5.0 backports_1.4.1 carData_3.0-5 viridis_0.6.3 DBI_1.1.3 pkgbuild_1.4.0 ggsignif_0.6.4 MASS_7.3-59
[57] openssl_2.0.6 sessioninfo_1.2.2 foreign_0.8-84 lmtest_0.9-40 zip_2.3.0 httpuv_1.6.9 visdat_0.6.0 glue_1.6.2
[65] callr_3.7.3 nlme_3.1-162 promises_1.2.0.1 reshape2_1.4.4 generics_0.1.3 gtable_0.3.3 tzdb_0.3.0 susieR_0.12.35
[73] hms_1.1.3 car_3.1-2 sp_1.6-0 utf8_1.2.3 pillar_1.9.0 vroom_1.6.3 later_1.3.1 mitools_2.4
[81] splines_4.3.0 getopt_1.20.3 lattice_0.21-8 bit_4.0.5 survival_3.5-5 tidyselect_1.2.0 miniUI_0.1.1.1 gridExtra_2.3
[89] xfun_0.39 arm_1.13-1 matrixStats_0.63.0 stringi_1.7.12 yaml_2.3.7 boot_1.3-28.1 evaluate_0.20 cli_3.6.1
[97] systemfonts_1.0.4 xtable_1.8-4 jquerylib_0.1.4 munsell_0.5.0 processx_3.8.1 Rcpp_1.0.10 coda_0.19-4 parallel_4.3.0
[105] ellipsis_0.3.2 prettyunits_1.1.1 profvis_0.3.8 urlchecker_1.0.1 lme4_1.1-33 viridisLite_0.4.2 crayon_1.5.2 rlang_1.1.1
[113] cowplot_1.1.1
save.image(paste0(PROJECT_loc, "/",Today,".",PROJECTNAME,".RegionalAssociationPlots.RData"))| © 1979-2023 Sander W. van der Laan | s.w.vanderlaan[at]gmail.com | swvanderlaan.github.io. |